---
title: "Y1 & Y2 Pre-Survey Study Strat Cleaning and Analyses"
author: "Fernando Rodriguez"
date: "11/2/2017"
output:
  html_document: default
  pdf_document: default
editor_options: 
  chunk_output_type: console
---


```{r, echo = F}
# Installing relevant packages
library(dplyr)
library(plyr)
library(ggplot2)
library(Hmisc)
library(reshape2)

```


# Loading Data
```{r}

# loading year 1 data
y1data <- read.csv("Year1studystrat.csv", header = T)

# loading year 2 data
y2data <- read.csv("Year2studystrat.csv", header = T)

```


## Appending Y1 and Y2 data by ucinetid 
```{r}

y12data <- rbind.fill(y1data, y2data)

```

# Data Checking

Checking for duplicate values and examining data
```{r}
y12data$duplicates <- duplicated(y12data$roster_randomid)
table(y12data$duplicates)

str(y12data)
table(y12data$year)
```

# Data Analysis
Self-reported behaviors at week 1 (Table 2)
```{r}
# proportion table of spacing vs. cramming
prop.table(table(y12data$pre_spacing_recode))*100

# self-test
prop.table(table(y12data$pre_ss_selftest))*100

# reread
prop.table(table(y12data$pre_ss_reread))*100

# videos
prop.table(table(y12data$pre_ss_videos))*100

# condense notes
prop.table(table(y12data$pre_ss_condense))*100

# diagrams
prop.table(table(y12data$pre_ss_diag))*100

# study with friends
prop.table(table(y12data$pre_ss_friends))*100

# highlight text
prop.table(table(y12data$pre_ss_high))*100

# flashcards
prop.table(table(y12data$pre_ss_flashc))*100

# recopy notes
prop.table(table(y12data$pre_ss_recopy))*100

# recopy notes from memory
prop.table(table(y12data$pre_ss_recopymem))*100

# other 
prop.table(table(y12data$pre_ss_other))*100

```




## URM Status and Study Strategies 
## Chi-square tests examining study strategies by URM status 
URM students have lower percentage of using <b>self-testing</b> and higher percentage of using <b>flashcards</b>

```{r}


table(y12data$urm)
# spacing
chisq.test(y12data$pre_spacing_recode, y12data$urm)

# self-testing
table(y12data$pre_ss_selftest, y12data$urm)

# self-testing by URM status
chisq.test(y12data$pre_ss_selftest, y12data$urm)

# rereading
chisq.test(y12data$pre_ss_reread, y12data$urm)

# watching videos
chisq.test(y12data$pre_ss_videos, y12data$urm)

# condesing or summarizing notes
chisq.test(y12data$pre_ss_condense, y12data$urm)

# making diagrams
chisq.test(y12data$pre_ss_diag, y12data$urm)

# highlight text
chisq.test(y12data$pre_ss_high, y12data$urm)

# flashcards
chisq.test(y12data$pre_ss_flashc, y12data$urm)

# study with friends
chisq.test(y12data$pre_ss_friends, y12data$urm)

# recopy notes
chisq.test(y12data$pre_ss_recopy, y12data$urm)

# recopy notes from memory
chisq.test(y12data$pre_ss_recopymem, y12data$urm)

```



URM /non-URM proportions
```{r}
studystrat_urm <-subset(y12data, urm == "URM")
table(studystrat_urm$urm)

studystrat_nourm <-subset(y12data, urm == "non-URM")
table(studystrat_nourm$urm)


# selftest
prop.table(table(studystrat_urm$pre_ss_selftest))
prop.table(table(studystrat_nourm$pre_ss_selftest))


# flashcards
prop.table(table(studystrat_urm$pre_ss_flashc))
prop.table(table(studystrat_nourm$pre_ss_flashc))

```




## Graph of Study Strategies by URM status
```{r}
# Looking at study strategies by Low-income/First-Gen


studystratPre <- subset(y12data, select = c(urm, pre_spacing_recode,
                                           pre_ss_selftest, pre_ss_reread, pre_ss_videos,
                                           pre_ss_condense, pre_ss_diag, pre_ss_high, 
                                           pre_ss_flashc, pre_ss_friends, pre_ss_recopy,
                                           pre_ss_recopymem, pre_ss_other))

table(studystratPre$pre_ss_reread)

label(studystratPre$pre_spacing_recode) <- "Spacing"

studystratPre_melt <- melt(studystratPre, id.vars = "urm")




studystratPre_melt <- ddply(studystratPre_melt, c("urm", "variable"), summarise,
      mean = mean(value), sd = sd(value),
      sem = sd(value)/sqrt(length(value)))

table(y12data$urm)

studystratPre_melt$urm <- factor(studystratPre_melt$urm, 
                                 labels = c("non-URM (n = 1,077)", "URM (n = 249)"))

# recoding URM so that the legend 
# studystratPre_melt$urm <- factor(studystratPre_melt$urm, levels = rev(levels(studystratPre_melt$urm))) # doesn't switch the legend order per say.


studystratPre_melt$variable <- factor(studystratPre_melt$variable,
                                      labels = c("Spacing", "Self-Testing", 
                                                 "Re-read Chapters", "Videos", 
                                                 "Condense Notes", "Diagrams",
                                                 "Highlight/Underline", "Flashcards", "Study with Friends", "Recopy Notes",
           "Recopy Notes from Memory", "Other"))
  


# plotting study strategies - greyscale

ggplot(studystratPre_melt, aes(x = variable, y = mean, fill = factor(urm), order = -as.numeric(urm))) +
  geom_bar(stat="identity",position="dodge") + 
  theme(axis.text.x = element_text(size = 10,angle = 90, hjust = 1)) +
  labs(x = "", y = "Proportion of Students", fill = "URM Status") +
  theme(axis.text.y = element_text(size = 10), legend.text=element_text(size=14),
        axis.text.x = element_text(size = 12, angle = 35)) +
    scale_y_continuous(limits = c(0, 1), breaks = c(0, .2, .4, .6, .8, 1)) +
   scale_fill_manual(values=c("#A9A9A9", "#333333")) +
  guides(fill = guide_legend(reverse = TRUE)) +
  theme(
    legend.title=element_blank(),
    legend.position=c(.80,.5))

ggsave("Y1 Y2 Study Skills Grey.png", width = 8, height = 6, units = "in")


```


```{r}

# plotting study strategies - color

ggplot(studystratPre_melt, aes(x = variable, y = mean*100, fill = factor(urm), order = -as.numeric(urm))) +
  geom_bar(stat="identity",position="dodge") + 
  theme(axis.text.x = element_text(size = 10,angle = 90, hjust = 1)) +
  labs(x = "", y = "Percentage", fill = "URM Status") +
  theme(axis.text.y = element_text(size = 10), legend.text=element_text(size=14),
        axis.text.x = element_text(size = 12, angle = 35)) +
    scale_y_continuous(limits = c(0, 100), breaks = c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100)) +
   scale_fill_manual(values=c("#EB6F00", "#A9A9A9")) +
  guides(fill = guide_legend(reverse = TRUE)) +
  theme(
    legend.title=element_blank(),
    legend.position=c(.80,.5))

ggsave("Y1 Y2 Study Skills Color.png", width = 8, height = 6, units = "in")


```







## Overall study strategies pre-survey responses
## Study Decisions (Table 3)
```{r}

# Subsetting data to only include students who answered the pre studydecide question
# 1224 observations
lm_y12_data <- subset(y12data, pre_studydecide_chall >= 0)
table(lm_y12_data$pre_studydecide_chall, exclude = NULL)
prop.table(table(lm_y12_data$pre_studydecide_chall))*100
prop.table(table(lm_y12_data$pre_studydecide_soon))*100
prop.table(table(lm_y12_data$pre_studydecide_imp))*100
prop.table(table(lm_y12_data$pre_studydecide_sched))*100
prop.table(table(lm_y12_data$pre_studydecide_easy))*100
prop.table(table(lm_y12_data$pre_studydecide_interest))*100
prop.table(table(lm_y12_data$pre_studydecide_other))*100


# urm subset
lmy12_urm <-subset(lm_y12_data, urm == "URM")
table(lmy12_urm$urm)
prop.table(table(lmy12_urm$pre_studydecide_chall))*100
prop.table(table(lmy12_urm$pre_studydecide_soon))*100
prop.table(table(lmy12_urm$pre_studydecide_imp))*100
prop.table(table(lmy12_urm$pre_studydecide_sched))*100
prop.table(table(lmy12_urm$pre_studydecide_easy))*100
prop.table(table(lmy12_urm$pre_studydecide_interest))*100
prop.table(table(lmy12_urm$pre_studydecide_other))*100

# non-urm subset
lmy12_nonurm <-subset(lm_y12_data, urm == "non-URM")
table(lmy12_nonurm$urm)
prop.table(table(lmy12_nonurm$pre_studydecide_chall))*100
prop.table(table(lmy12_nonurm$pre_studydecide_soon))*100
prop.table(table(lmy12_nonurm$pre_studydecide_imp))*100
prop.table(table(lmy12_nonurm$pre_studydecide_sched))*100
prop.table(table(lmy12_nonurm$pre_studydecide_easy))*100
prop.table(table(lmy12_nonurm$pre_studydecide_interest))*100
prop.table(table(lmy12_nonurm$pre_studydecide_other))*100

```



## Examining study decisions by URM status
```{r}
# challening
prop.table(table(lm_y12_data$pre_studydecide_chall, lm_y12_data$urm), margin = 2)*100
chisq.test(lm_y12_data$pre_studydecide_chall, lm_y12_data$urm)

# soonest
prop.table(table(lm_y12_data$pre_studydecide_soon, lm_y12_data$urm), margin = 2)*100
chisq.test(lm_y12_data$pre_studydecide_soon, lm_y12_data$urm)

```





